##Setup 
#####
if(T == T){ 
rm(list=ls())
rce_indicator = F
#library("FNN")
#library("mvtnorm")
#library("Rsolnp")
#library("clusterGeneration")
if(rce_indicator == F){ setwd("~/Dropbox/AWS Infrastructure") } 
if(rce_indicator==T){
  .libPaths(unique(c(.libPaths(), 
                     "/usr/lib64/R/library", "/usr/share/R/library", 
                     "/nfs/home/C/cjerzak/.R/library-x86_64/")))
  mainDir <- "~/shared_space/ps_ctj"
  args <- commandArgs(TRUE); ijack <- try(as.integer(args[1]) + 1  , T);
}

readme_files <- c("readme2_helper_fxns.R", "readme2_core.R","readme2_causal_battery_helpers.R")
for(readme_file in readme_files){ print(readme_file); source(sprintf("./Readme2_Code/%s", readme_file)) }

rm(list=c("knn_adapt","EnsembleMethod",
          "readme_est_fxn","undergrad","readme"))

resave_files <- F
if(resave_files == T){ 
    my_scripts <- list.files("./Experiments Data/Semisynthetic Experiments/Good Experiments")
    for(my_script in my_scripts){
      print( my_script ) 
      source(sprintf("./Experiments Data/Semisynthetic Experiments/Good Experiments/%s", my_script))  
    }
} 


my_datasets <- list.files("./Experiments Data/Semisynthetic Experiments/Saved Datasets")
if(rce_indicator == F){ 
  my_datasets <- my_datasets[sample(1:length(my_datasets), length(my_datasets))]
  n_replications <- 10
} 
if(rce_indicator == T){ n_replications <- 100  } 
synthetic_treatment_effect <- 0
return_mat <- c()
for(ijack in 1:length( my_datasets) ){
#if(T == T){ 
     my_dataset <- my_datasets[ijack]
     print( my_dataset )
     my_data <- read.csv(sprintf("./Experiments Data/Semisynthetic Experiments/Saved Datasets/%s", my_dataset))[,-1]
     my_data <- as.data.frame( apply(my_data, 2, function(x) f2n(x)))
     Y_obs <- my_data$Y_obs
     
     if(length(  table(Y_obs) )  > 0){ 
     
     true_treatment_indicator <- my_data$true_treatment_indicator 
     full_X_raw <- my_data[,!colnames(my_data) %in% c("Y_obs", "true_treatment_indicator") ]

     nonMissing_indices <- !is.na(Y_obs) & !is.na(rowSums(full_X_raw))
     
     #the big three 
     full_X_use <- as.matrix( scale( full_X_raw[nonMissing_indices,] )  ) ; full_X_use <- full_X_use[,which(apply(full_X_use, 2, sd)>0)]
     Y_obs <- Y_obs[nonMissing_indices]
     true_treatment_indicator <- true_treatment_indicator[nonMissing_indices]

     synthetic_treatment_effect_seq  <- 0#0.10 * sd(Y_obs)#
     inner_counter <- 0 
     for(synthetic_treatment_effect in synthetic_treatment_effect_seq ){ 
       inner_counter <- inner_counter + 1 
       full_X = full_X_use; typePropensity = "forest"; max_n <- 1000 
       my_savename <- paste("Results___", gsub(my_dataset, pattern = "\\.", replace = "_"), sep = "")
       x <- try(semisynthetic_testor(full_X = full_X, 
                                    Y_obs = Y_obs, 
                                    true_treatment_indicator = true_treatment_indicator, 
                                    typePropensity = typePropensity, 
                                    n_replications = n_replications, 
                                    max_n = max_n, 
                                    synthetic_treatment_effect = synthetic_treatment_effect, 
                                    saveName = my_savename ), T)
       if(class(x ) == "try-error"){browser()}
     } 
     }
} 
}


if(T == F){ 
#load in data 
return_mat <- c() 
for(my_dataset in my_datasets){ 
  my_savename <- paste("Results___", gsub(my_dataset, pattern = "\\.", replace = "_"), sep = "")
  AbsErrorMat_full <- try(read.csv(sprintf("./semisynthetic_exp_%s.csv", my_savename))[,-1], T) 
  if(class(AbsErrorMat_full) != "try-error"){
    #mse_vec <- colMeans(AbsErrorMat_full)
    #mse_vec <- colMeans(t(apply(AbsErrorMat_full, 1, function(x){x <= x[1]})))
    which_compare <- which(colnames(AbsErrorMat_full) == "readme2_est")
    mse_vec <- colMeans(t(apply(AbsErrorMat_full, 1, function(x){x[which_compare] <= x})))
    
    meta_vec <- c(my_dataset, nrow(full_X), length(unique(Y_obs)), 
                  synthetic_treatment_effect, n_replications)
    names(meta_vec) <- c("dataset", "n", "unique_Yobs", "synthetic_treatment_effect", "n_replications")
    return_mat <- rbind(return_mat, c(meta_vec, unlist(mse_vec)) )
  } 
} 
return_mat_orig <- return_mat

return_mat <- as.data.frame( return_mat_orig )
return_mat[,-c(1)] <- apply(return_mat[,-c(1)], 2, function(x) f2n(x))
sort(  colMeans(return_mat[,-c(1:5)], na.rm = T) , decreasing = T) 

pdf("~/Downloads/causal.pdf")
if(T == T){ 
par(mar=c(12, 5, 0.75, 2) )
return_mat$dataset_names[return_mat$dataset == "my_data_1.csv"] <- 'Bolsen et al. (2013)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_2.csv"] <- 'Gerber & Green (2000)' # check 
return_mat$dataset_names[return_mat$dataset == "my_data_3.csv"] <- 'Callen et al. (2014)'
return_mat$dataset_names[return_mat$dataset == "my_data_5.csv"] <- 'McClendon & Riedl (2015)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_6.csv"] <- 'Taylor et al. (2012)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_7.csv"] <- 'Finkelstein & Baicker (2014)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_8.csv"] <- 'Enos & Fowler (2016)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_9.csv"] <- 'Bailey et al. (2016)'#check 
return_mat$dataset_names[return_mat$dataset == "my_data_10.csv"] <- 'Kugler et al. (2015)'#check 
return_mat <- return_mat[ order(sapply(as.character(return_mat$dataset_names), function(j){ regmatches(j, gregexpr("(?<=\\().*?(?=\\))", j, perl=T))[[1]] } ) ),]
plot( return_mat$predictive_mean_est, log = "",
      cex.lab = 1.5, ylim = c(0.01,1),xlim=c(-1,length(unique(return_mat$dataset))) ,
      type = "b", pch = 1, col = "black", lty = 1, 
      ylab = "Proportion of Datasets with Higher SAE",xaxt = "n", xlab = "") 
abline(h=0.5, lwd = 2, lty = 2)
points( return_mat$euclid_est, type = "b", pch = 2, col = "purple", lty = 2)
points( return_mat$psm_est, type = "b", pch = 3, col = "gray", lty = 3)
points( return_mat$embeddings_est, type = "b", pch = 4, col = "orange", lty = 4)
axis(1, at=1:nrow(return_mat), labels=return_mat$dataset_names, las = 2,cex.axis =0.85)
axis(1, at=ceiling(nrow(return_mat)/2), labels="Dataset Citation", padj = 8,cex.axis=2)
legend("bottomright", 
       pch = c(1,2,3,4),
       col =c("black", "purple", "gray", "orange"),
       lty = c(1,2,3,4),
       legend = c("Predictive Mean", "Raw Features", "Propensity Scores", "Random Projections"), 
       bty = "n")
abline(h=0.5, lwd = 2, lty = 2)
text(x=0,y=0.60,labels = "higher SAE than \n readme2 features",cex = 0.80)
text(x=0,y=0.40,labels = "lower SAE than \n readme2 features",cex = 0.80)
} 
dev.off() 

} 